/*
 * Portions Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This software may be used and distributed according to the terms of the
 * GNU General Public License version 2.
 */

/*
 * manifest.c - manifest type that does on-demand parsing.
 *
 * Copyright 2015, Google Inc.
 *
 * This software may be used and distributed according to the terms of
 * the GNU General Public License, incorporated herein by reference.
 */
#define PY_SSIZE_T_CLEAN
#include <Python.h> // @manual=fbsource//third-party/python:python

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "eden/scm/sapling/cext/charencode.h"
#include "eden/scm/sapling/cext/util.h"

#define DEFAULT_LINES 100000

typedef struct {
#ifdef IS_PY3K
  const char* start;
#else
  char* start;
#endif
  Py_ssize_t len; /* length of line including terminal newline */
  char hash_suffix;
  bool from_malloc;
  bool deleted;
} line;

typedef struct {
  PyObject_HEAD PyObject* pydata;
  line* lines;
  int numlines; /* number of line entries */
  int livelines; /* number of non-deleted lines */
  int maxlines; /* allocated number of lines */
  bool dirty;
} lazymanifest;

#define MANIFEST_OOM -1
#define MANIFEST_NOT_SORTED -2
#define MANIFEST_MALFORMED -3

/* get the length of the path for a line */
static size_t pathlen(line* l) {
  return strlen(l->start);
}

/* get the node value of a single line */
static PyObject* nodeof(line* l) {
  const char* s = l->start;
  ssize_t llen = pathlen(l);
  PyObject* hash = unhexlify(s + llen + 1, 40);
  if (!hash) {
    return NULL;
  }
  if (l->hash_suffix != '\0') {
    char newhash[21];
    memcpy(newhash, PyBytes_AsString(hash), 20);
    Py_DECREF(hash);
    newhash[20] = l->hash_suffix;
    hash = PyBytes_FromStringAndSize(newhash, 21);
  }
  return hash;
}

/* get the node hash and flags of a line as a tuple */
static PyObject* hashflags(line* l) {
  const char* s = l->start;
  size_t plen = pathlen(l);
  PyObject* hash = nodeof(l);

  /* 40 for hash, 1 for null byte, 1 for newline */
  size_t hplen = plen + 42;
  Py_ssize_t flen = l->len - hplen;
  PyObject* flags;
  PyObject* tup;

  if (!hash) {
    return NULL;
  }
#ifdef IS_PY3K
  flags = PyUnicode_FromStringAndSize(s + hplen - 1, flen);
#else
  flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
#endif
  if (!flags) {
    Py_DECREF(hash);
    return NULL;
  }
  tup = PyTuple_Pack(2, hash, flags);
  Py_DECREF(flags);
  Py_DECREF(hash);
  return tup;
}

/* if we're about to run out of space in the line index, add more */
static bool realloc_if_full(lazymanifest* self) {
  if (self->numlines == self->maxlines) {
    self->maxlines *= 2;
    self->lines = realloc(self->lines, self->maxlines * sizeof(line));
  }
  return !!self->lines;
}

/*
 * Find the line boundaries in the manifest that 'data' points to and store
 * information about each line in 'self'.
 */
static int find_lines(lazymanifest* self, char* data, Py_ssize_t len) {
  char* prev = NULL;
  while (len > 0) {
    line* l;
    char* next = memchr(data, '\n', len);
    if (!next) {
      return MANIFEST_MALFORMED;
    }
    next++; /* advance past newline */
    if (!realloc_if_full(self)) {
      return MANIFEST_OOM; /* no memory */
    }
    if (prev && strcmp(prev, data) > -1) {
      /* This data isn't sorted, so we have to abort. */
      return MANIFEST_NOT_SORTED;
    }
    l = self->lines + ((self->numlines)++);
    l->start = data;
    l->len = next - data;
    l->hash_suffix = '\0';
    l->from_malloc = false;
    l->deleted = false;
    len = len - l->len;
    prev = data;
    data = next;
  }
  self->livelines = self->numlines;
  return 0;
}

static int lazymanifest_init(lazymanifest* self, PyObject* args) {
  char* data;
  Py_ssize_t len;
  int err, ret;
  PyObject* pydata;
  if (!PyArg_ParseTuple(args, "S", &pydata)) {
    return -1;
  }
  err = PyBytes_AsStringAndSize(pydata, &data, &len);

  self->dirty = false;
  if (err == -1) {
    return -1;
  }
  self->pydata = pydata;
  Py_INCREF(self->pydata);
  Py_BEGIN_ALLOW_THREADS self->lines = malloc(DEFAULT_LINES * sizeof(line));
  self->maxlines = DEFAULT_LINES;
  self->numlines = 0;
  if (!self->lines) {
    ret = MANIFEST_OOM;
  } else {
    ret = find_lines(self, data, len);
  }
  Py_END_ALLOW_THREADS switch (ret) {
    case 0:
      break;
    case MANIFEST_OOM:
      PyErr_NoMemory();
      break;
    case MANIFEST_NOT_SORTED:
      PyErr_Format(PyExc_ValueError, "Manifest lines not in sorted order.");
      break;
    case MANIFEST_MALFORMED:
      PyErr_Format(PyExc_ValueError, "Manifest did not end in a newline.");
      break;
    default:
      PyErr_Format(PyExc_ValueError, "Unknown problem parsing manifest.");
  }
  return ret == 0 ? 0 : -1;
}

static void lazymanifest_dealloc(lazymanifest* self) {
  /* free any extra lines we had to allocate */
  int i;
  for (i = 0; i < self->numlines; i++) {
    if (self->lines[i].from_malloc) {
      free((void*)self->lines[i].start);
    }
  }
  if (self->lines) {
    free(self->lines);
    self->lines = NULL;
  }
  if (self->pydata) {
    Py_DECREF(self->pydata);
    self->pydata = NULL;
  }
  PyObject_Del(self);
}

/* iteration support */

typedef struct {
  PyObject_HEAD lazymanifest* m;
  Py_ssize_t pos;
} lmIter;

static void lmiter_dealloc(PyObject* o) {
  lmIter* self = (lmIter*)o;
  Py_DECREF(self->m);
  PyObject_Del(self);
}

static line* lmiter_nextline(lmIter* self) {
  do {
    self->pos++;
    if (self->pos >= self->m->numlines) {
      return NULL;
    }
    /* skip over deleted manifest entries */
  } while (self->m->lines[self->pos].deleted);
  return self->m->lines + self->pos;
}

static PyObject* lmiter_iterentriesnext(PyObject* o) {
  size_t pl;
  line* l;
  Py_ssize_t consumed;
  PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
  l = lmiter_nextline((lmIter*)o);
  if (!l) {
    goto done;
  }
  pl = pathlen(l);
#ifdef IS_PY3K
  path = PyUnicode_FromStringAndSize(l->start, pl);
#else
  path = PyBytes_FromStringAndSize(l->start, pl);
#endif
  hash = nodeof(l);
  consumed = pl + 41;
#ifdef IS_PY3K
  flags =
      PyUnicode_FromStringAndSize(l->start + consumed, l->len - consumed - 1);
#else
  flags = PyBytes_FromStringAndSize(l->start + consumed, l->len - consumed - 1);
#endif
  if (!path || !hash || !flags) {
    goto done;
  }
  ret = PyTuple_Pack(3, path, hash, flags);
done:
  Py_XDECREF(path);
  Py_XDECREF(hash);
  Py_XDECREF(flags);
  return ret;
}

#ifdef IS_PY3K
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS \
  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER
#endif

static PyTypeObject lazymanifestEntriesIterator = {
    PyVarObject_HEAD_INIT(NULL, 0) /* header */
    "parsers.lazymanifest.entriesiterator", /*tp_name */
    sizeof(lmIter), /*tp_basicsize */
    0, /*tp_itemsize */
    lmiter_dealloc, /*tp_dealloc */
    0, /*tp_print */
    0, /*tp_getattr */
    0, /*tp_setattr */
    0, /*tp_compare */
    0, /*tp_repr */
    0, /*tp_as_number */
    0, /*tp_as_sequence */
    0, /*tp_as_mapping */
    0, /*tp_hash */
    0, /*tp_call */
    0, /*tp_str */
    0, /*tp_getattro */
    0, /*tp_setattro */
    0, /*tp_as_buffer */
    LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
    "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
    0, /* tp_traverse */
    0, /* tp_clear */
    0, /* tp_richcompare */
    0, /* tp_weaklistoffset */
    PyObject_SelfIter, /* tp_iter: __iter__() method */
    lmiter_iterentriesnext, /* tp_iternext: next() method */
};

static PyObject* lmiter_iterkeysnext(PyObject* o) {
  size_t pl;
  line* l = lmiter_nextline((lmIter*)o);
  if (!l) {
    return NULL;
  }
  pl = pathlen(l);
#ifdef IS_PY3K
  return PyUnicode_FromStringAndSize(l->start, pl);
#else
  return PyBytes_FromStringAndSize(l->start, pl);
#endif
}

#ifdef IS_PY3K
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFESTKEYSITERATOR_TPFLAGS \
  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER
#endif

static PyTypeObject lazymanifestKeysIterator = {
    PyVarObject_HEAD_INIT(NULL, 0) /* header */
    "parsers.lazymanifest.keysiterator", /*tp_name */
    sizeof(lmIter), /*tp_basicsize */
    0, /*tp_itemsize */
    lmiter_dealloc, /*tp_dealloc */
    0, /*tp_print */
    0, /*tp_getattr */
    0, /*tp_setattr */
    0, /*tp_compare */
    0, /*tp_repr */
    0, /*tp_as_number */
    0, /*tp_as_sequence */
    0, /*tp_as_mapping */
    0, /*tp_hash */
    0, /*tp_call */
    0, /*tp_str */
    0, /*tp_getattro */
    0, /*tp_setattro */
    0, /*tp_as_buffer */
    LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
    "Keys iterator for a lazymanifest.", /* tp_doc */
    0, /* tp_traverse */
    0, /* tp_clear */
    0, /* tp_richcompare */
    0, /* tp_weaklistoffset */
    PyObject_SelfIter, /* tp_iter: __iter__() method */
    lmiter_iterkeysnext, /* tp_iternext: next() method */
};

static lazymanifest* lazymanifest_copy(lazymanifest* self);

static PyObject* lazymanifest_getentriesiter(lazymanifest* self) {
  lmIter* i = NULL;
  lazymanifest* t = lazymanifest_copy(self);
  if (!t) {
    PyErr_NoMemory();
    return NULL;
  }
  i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
  if (i) {
    i->m = t;
    i->pos = -1;
  } else {
    Py_DECREF(t);
    PyErr_NoMemory();
  }
  return (PyObject*)i;
}

static PyObject* lazymanifest_getkeysiter(lazymanifest* self) {
  lmIter* i = NULL;
  lazymanifest* t = lazymanifest_copy(self);
  if (!t) {
    PyErr_NoMemory();
    return NULL;
  }
  i = PyObject_New(lmIter, &lazymanifestKeysIterator);
  if (i) {
    i->m = t;
    i->pos = -1;
  } else {
    Py_DECREF(t);
    PyErr_NoMemory();
  }
  return (PyObject*)i;
}

/* __getitem__ and __setitem__ support */

static Py_ssize_t lazymanifest_size(lazymanifest* self) {
  return self->livelines;
}

static int linecmp(const void* left, const void* right) {
  return strcmp(((const line*)left)->start, ((const line*)right)->start);
}

static PyObject* lazymanifest_getitem(lazymanifest* self, PyObject* key) {
  line needle;
  line* hit;
#ifdef IS_PY3K
  if (!PyUnicode_Check(key)) {
    PyErr_Format(
        PyExc_TypeError, "getitem: manifest keys must be a unicode str.");
    return NULL;
  }
  needle.start = PyUnicode_AsUTF8(key);
#else
  if (!PyBytes_Check(key)) {
    PyErr_Format(PyExc_TypeError, "getitem: manifest keys must be a string.");
    return NULL;
  }
  needle.start = PyBytes_AsString(key);
#endif
  hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), &linecmp);
  if (!hit || hit->deleted) {
    PyErr_Format(PyExc_KeyError, "No such manifest entry.");
    return NULL;
  }
  return hashflags(hit);
}

static int lazymanifest_delitem(lazymanifest* self, PyObject* key) {
  line needle;
  line* hit;
#ifdef IS_PY3K
  if (!PyUnicode_Check(key)) {
    PyErr_Format(PyExc_TypeError, "delitem: manifest keys must be a str.");
    return -1;
  }
  needle.start = PyUnicode_AsUTF8(key);
#else
  if (!PyBytes_Check(key)) {
    PyErr_Format(PyExc_TypeError, "delitem: manifest keys must be a string.");
    return -1;
  }
  needle.start = PyBytes_AsString(key);
#endif
  hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), &linecmp);
  if (!hit || hit->deleted) {
    PyErr_Format(PyExc_KeyError, "Tried to delete nonexistent manifest entry.");
    return -1;
  }
  self->dirty = true;
  hit->deleted = true;
  self->livelines--;
  return 0;
}

/* Do a binary search for the insertion point for new, creating the
 * new entry if needed. */
static int internalsetitem(lazymanifest* self, line* new) {
  int start = 0, end = self->numlines;
  while (start < end) {
    int pos = start + (end - start) / 2;
    int c = linecmp(new, self->lines + pos);
    if (c < 0) {
      end = pos;
    } else if (c > 0) {
      start = pos + 1;
    } else {
      if (self->lines[pos].deleted) {
        self->livelines++;
      }
      if (self->lines[pos].from_malloc) {
        free((void*)self->lines[pos].start);
      }
      start = pos;
      goto finish;
    }
  }
  /* being here means we need to do an insert */
  if (!realloc_if_full(self)) {
    PyErr_NoMemory();
    return -1;
  }
  memmove(
      self->lines + start + 1,
      self->lines + start,
      (self->numlines - start) * sizeof(line));
  self->numlines++;
  self->livelines++;
finish:
  self->lines[start] = *new;
  self->dirty = true;
  return 0;
}

static int
lazymanifest_setitem(lazymanifest* self, PyObject* key, PyObject* value) {
  const char* path;
  Py_ssize_t plen;
  PyObject* pyhash;
  Py_ssize_t hlen;
  char* hash;
  PyObject* pyflags;
  const char* flags;
  Py_ssize_t flen;
  size_t dlen;
  char* dest;
  int i;
  line new;
#ifdef IS_PY3K
  if (!PyUnicode_Check(key)) {
    PyErr_Format(PyExc_TypeError, "setitem: manifest keys must be a str.");
    return -1;
  }
#else
  if (!PyBytes_Check(key)) {
    PyErr_Format(PyExc_TypeError, "setitem: manifest keys must be a str.");
    return -1;
  }
#endif
  if (!value) {
    return lazymanifest_delitem(self, key);
  }
  if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
    PyErr_Format(
        PyExc_TypeError, "Manifest values must be a tuple of (node, flags).");
    return -1;
  }
#ifdef IS_PY3K
  path = PyUnicode_AsUTF8AndSize(key, &plen);
#else
  if (PyBytes_AsStringAndSize(key, (char**)&path, &plen) == -1) {
    return -1;
  }
#endif
  pyhash = PyTuple_GetItem(value, 0);
  if (!PyBytes_Check(pyhash)) {
    PyErr_Format(PyExc_TypeError, "node must be a 20-byte bytes");
    return -1;
  }
  hlen = PyBytes_Size(pyhash);
  /* Some parts of the codebase try and set 21 or 22
   * byte "hash" values in order to perturb things for
   * status. We have to preserve at least the 21st
   * byte. Sigh. If there's a 22nd byte, we drop it on
   * the floor, which works fine.
   */
  if (hlen != 20 && hlen != 21 && hlen != 22) {
    PyErr_Format(PyExc_TypeError, "node must be a 20-byte bytes");
    return -1;
  }
  hash = PyBytes_AsString(pyhash);

  pyflags = PyTuple_GetItem(value, 1);
#ifdef IS_PY3K
  if (!PyUnicode_Check(pyflags)) {
    PyErr_Format(PyExc_TypeError, "flags must be a str");
    return -1;
  }
  flags = PyUnicode_AsUTF8AndSize(pyflags, &flen);
  if (flen > 1) {
    PyErr_Format(PyExc_TypeError, "len(flags) must be <= 1");
    return -1;
  }
#else
  if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
    PyErr_Format(PyExc_TypeError, "flags must a 0 or 1 byte string");
    return -1;
  }
  if (PyBytes_AsStringAndSize(pyflags, (char**)&flags, &flen) == -1) {
    return -1;
  }
#endif
  /* one null byte and one newline */
  dlen = plen + 41 + flen + 1;
  dest = malloc(dlen);
  if (!dest) {
    PyErr_NoMemory();
    return -1;
  }
  memcpy(dest, path, plen + 1);
  for (i = 0; i < 20; i++) {
    /* Cast to unsigned, so it will not get sign-extended when promoted
     * to int (as is done when passing to a variadic function)
     */
    sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
  }
  memcpy(dest + plen + 41, flags, flen);
  dest[plen + 41 + flen] = '\n';
  new.start = dest;
  new.len = dlen;
  new.hash_suffix = '\0';
  if (hlen > 20) {
    new.hash_suffix = hash[20];
  }
  new.from_malloc = true; /* is `start` a pointer we allocated? */
  new.deleted = false; /* is this entry deleted? */
  if (internalsetitem(self, &new)) {
    return -1;
  }
  return 0;
}

static PyMappingMethods lazymanifest_mapping_methods = {
    (lenfunc)lazymanifest_size, /* mp_length */
    (binaryfunc)lazymanifest_getitem, /* mp_subscript */
    (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
};

/* sequence methods (important or __contains__ builds an iterator) */

static int lazymanifest_contains(lazymanifest* self, PyObject* key) {
  line needle;
  line* hit;
  if (
#ifdef IS_PY3K
      !PyUnicode_Check(key)
#else
      !PyBytes_Check(key)
#endif
  ) {
    /* Our keys are always strings, so if the contains
     * check is for a non-string, just return false. */
    return 0;
  }
#ifdef IS_PY3K
  needle.start = PyUnicode_AsUTF8(key);
#else
  needle.start = PyBytes_AsString(key);
#endif
  hit = bsearch(&needle, self->lines, self->numlines, sizeof(line), &linecmp);
  if (!hit || hit->deleted) {
    return 0;
  }
  return 1;
}

static PySequenceMethods lazymanifest_seq_meths = {
    (lenfunc)lazymanifest_size, /* sq_length */
    0, /* sq_concat */
    0, /* sq_repeat */
    0, /* sq_item */
    0, /* sq_slice */
    0, /* sq_ass_item */
    0, /* sq_ass_slice */
    (objobjproc)lazymanifest_contains, /* sq_contains */
    0, /* sq_inplace_concat */
    0, /* sq_inplace_repeat */
};

/* Other methods (copy, diff, etc) */
static PyTypeObject lazymanifestType;

/* If the manifest has changes, build the new manifest text and reindex it. */
static int compact(lazymanifest* self) {
  int i;
  ssize_t need = 0;
  char* data;
  line *src, *dst;
  PyObject* pydata;
  if (!self->dirty) {
    return 0;
  }
  for (i = 0; i < self->numlines; i++) {
    if (!self->lines[i].deleted) {
      need += self->lines[i].len;
    }
  }
  pydata = PyBytes_FromStringAndSize(NULL, need);
  if (!pydata) {
    return -1;
  }
  data = PyBytes_AsString(pydata);
  if (!data) {
    return -1;
  }
  src = self->lines;
  dst = self->lines;
  for (i = 0; i < self->numlines; i++, src++) {
    const char* tofree = NULL;
    if (src->from_malloc) {
      tofree = src->start;
    }
    if (!src->deleted) {
      memcpy(data, src->start, src->len);
      *dst = *src;
      dst->start = data;
      dst->from_malloc = false;
      data += dst->len;
      dst++;
    }
    free((void*)tofree);
  }
  Py_DECREF(self->pydata);
  self->pydata = pydata;
  self->numlines = self->livelines;
  self->dirty = false;
  return 0;
}

static PyObject* lazymanifest_text(lazymanifest* self) {
  if (compact(self) != 0) {
    PyErr_NoMemory();
    return NULL;
  }
  Py_INCREF(self->pydata);
  return self->pydata;
}

static lazymanifest* lazymanifest_copy(lazymanifest* self) {
  lazymanifest* copy = NULL;
  if (compact(self) != 0) {
    goto nomem;
  }
  copy = PyObject_New(lazymanifest, &lazymanifestType);
  if (!copy) {
    goto nomem;
  }
  copy->numlines = self->numlines;
  copy->livelines = self->livelines;
  copy->dirty = false;
  copy->lines = malloc(self->maxlines * sizeof(line));
  if (!copy->lines) {
    goto nomem;
  }
  memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
  copy->maxlines = self->maxlines;
  copy->pydata = self->pydata;
  Py_INCREF(copy->pydata);
  return copy;
nomem:
  PyErr_NoMemory();
  Py_XDECREF(copy);
  return NULL;
}

static lazymanifest* lazymanifest_filtercopy(
    lazymanifest* self,
    PyObject* matchfn) {
  lazymanifest* copy = NULL;
  int i;
  if (!PyCallable_Check(matchfn)) {
    PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
    return NULL;
  }
  /* compact ourselves first to avoid double-frees later when we
   * compact tmp so that it doesn't have random pointers to our
   * underlying from_malloc-data (self->pydata is safe) */
  if (compact(self) != 0) {
    goto nomem;
  }
  copy = PyObject_New(lazymanifest, &lazymanifestType);
  if (!copy) {
    goto nomem;
  }
  copy->dirty = true;
  copy->lines = malloc(self->maxlines * sizeof(line));
  if (!copy->lines) {
    goto nomem;
  }
  copy->maxlines = self->maxlines;
  copy->numlines = 0;
  copy->pydata = self->pydata;
  Py_INCREF(self->pydata);
  for (i = 0; i < self->numlines; i++) {
    PyObject *arglist = NULL, *result = NULL;
    arglist = Py_BuildValue("(s)", self->lines[i].start);
    if (!arglist) {
      return NULL;
    }
    result = PyObject_CallObject(matchfn, arglist);
    Py_DECREF(arglist);
    /* if the callback raised an exception, just let it
     * through and give up */
    if (!result) {
      free(copy->lines);
      Py_DECREF(self->pydata);
      return NULL;
    }
    if (PyObject_IsTrue(result)) {
      assert(!(self->lines[i].from_malloc));
      copy->lines[copy->numlines++] = self->lines[i];
    }
    Py_DECREF(result);
  }
  copy->livelines = copy->numlines;
  return copy;
nomem:
  PyErr_NoMemory();
  Py_XDECREF(copy);
  return NULL;
}

static PyObject* lazymanifest_diff(lazymanifest* self, PyObject* args) {
  lazymanifest* other;
  PyObject* pyclean = NULL;
  bool listclean;
  PyObject *emptyTup = NULL, *ret = NULL;
  PyObject* es;
  int sneedle = 0, oneedle = 0;
  if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
    return NULL;
  }
  listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
#ifdef IS_PY3K
  es = PyUnicode_FromString("");
#else
  es = PyBytes_FromString("");
#endif
  if (!es) {
    goto nomem;
  }
  emptyTup = PyTuple_Pack(2, Py_None, es);
  Py_DECREF(es);
  if (!emptyTup) {
    goto nomem;
  }
  ret = PyDict_New();
  if (!ret) {
    goto nomem;
  }
  while (sneedle != self->numlines || oneedle != other->numlines) {
    line* left = self->lines + sneedle;
    line* right = other->lines + oneedle;
    int result;
    PyObject* key;
    PyObject* outer;
    /* If we're looking at a deleted entry and it's not
     * the end of the manifest, just skip it. */
    if (sneedle < self->numlines && left->deleted) {
      sneedle++;
      continue;
    }
    if (oneedle < other->numlines && right->deleted) {
      oneedle++;
      continue;
    }
    /* if we're at the end of either manifest, then we
     * know the remaining items are adds so we can skip
     * the strcmp. */
    if (sneedle == self->numlines) {
      result = 1;
    } else if (oneedle == other->numlines) {
      result = -1;
    } else {
      result = linecmp(left, right);
    }
#ifdef IS_PY3K
    key = result <= 0 ? PyUnicode_FromString(left->start)
                      : PyUnicode_FromString(right->start);
#else
    key = result <= 0 ? PyBytes_FromString(left->start)
                      : PyBytes_FromString(right->start);
#endif
    if (!key) {
      goto nomem;
    }
    if (result < 0) {
      PyObject* l = hashflags(left);
      if (!l) {
        goto nomem;
      }
      outer = PyTuple_Pack(2, l, emptyTup);
      Py_DECREF(l);
      if (!outer) {
        goto nomem;
      }
      PyDict_SetItem(ret, key, outer);
      Py_DECREF(outer);
      sneedle++;
    } else if (result > 0) {
      PyObject* r = hashflags(right);
      if (!r) {
        goto nomem;
      }
      outer = PyTuple_Pack(2, emptyTup, r);
      Py_DECREF(r);
      if (!outer) {
        goto nomem;
      }
      PyDict_SetItem(ret, key, outer);
      Py_DECREF(outer);
      oneedle++;
    } else {
      /* file exists in both manifests */
      if (left->len != right->len ||
          memcmp(left->start, right->start, left->len) ||
          left->hash_suffix != right->hash_suffix) {
        PyObject* l = hashflags(left);
        PyObject* r;
        if (!l) {
          goto nomem;
        }
        r = hashflags(right);
        if (!r) {
          Py_DECREF(l);
          goto nomem;
        }
        outer = PyTuple_Pack(2, l, r);
        Py_DECREF(l);
        Py_DECREF(r);
        if (!outer) {
          goto nomem;
        }
        PyDict_SetItem(ret, key, outer);
        Py_DECREF(outer);
      } else if (listclean) {
        PyDict_SetItem(ret, key, Py_None);
      }
      sneedle++;
      oneedle++;
    }
    Py_DECREF(key);
  }
  Py_DECREF(emptyTup);
  return ret;
nomem:
  PyErr_NoMemory();
  Py_XDECREF(ret);
  Py_XDECREF(emptyTup);
  return NULL;
}

static PyMethodDef lazymanifest_methods[] = {
    {"keys",
     (PyCFunction)lazymanifest_getkeysiter,
     METH_NOARGS,
     "Iterate over file names in this lazymanifest."},
    {"iterkeys",
     (PyCFunction)lazymanifest_getkeysiter,
     METH_NOARGS,
     "Iterate over file names in this lazymanifest."},
    {"iterentries",
     (PyCFunction)lazymanifest_getentriesiter,
     METH_NOARGS,
     "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
    {"copy",
     (PyCFunction)lazymanifest_copy,
     METH_NOARGS,
     "Make a copy of this lazymanifest."},
    {"filtercopy",
     (PyCFunction)lazymanifest_filtercopy,
     METH_O,
     "Make a copy of this manifest filtered by matchfn."},
    {"diff",
     (PyCFunction)lazymanifest_diff,
     METH_VARARGS,
     "Compare this lazymanifest to another one."},
    {"text",
     (PyCFunction)lazymanifest_text,
     METH_NOARGS,
     "Encode this manifest to text."},
    {NULL},
};

#ifdef IS_PY3K
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
#else
#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
#endif

static PyTypeObject lazymanifestType = {
    PyVarObject_HEAD_INIT(NULL, 0) /* header */
    "parsers.lazymanifest", /* tp_name */
    sizeof(lazymanifest), /* tp_basicsize */
    0, /* tp_itemsize */
    (destructor)lazymanifest_dealloc, /* tp_dealloc */
    0, /* tp_print */
    0, /* tp_getattr */
    0, /* tp_setattr */
    0, /* tp_compare */
    0, /* tp_repr */
    0, /* tp_as_number */
    &lazymanifest_seq_meths, /* tp_as_sequence */
    &lazymanifest_mapping_methods, /* tp_as_mapping */
    0, /* tp_hash */
    0, /* tp_call */
    0, /* tp_str */
    0, /* tp_getattro */
    0, /* tp_setattro */
    0, /* tp_as_buffer */
    LAZYMANIFEST_TPFLAGS, /* tp_flags */
    "TODO(augie)", /* tp_doc */
    0, /* tp_traverse */
    0, /* tp_clear */
    0, /* tp_richcompare */
    0, /* tp_weaklistoffset */
    (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
    0, /* tp_iternext */
    lazymanifest_methods, /* tp_methods */
    0, /* tp_members */
    0, /* tp_getset */
    0, /* tp_base */
    0, /* tp_dict */
    0, /* tp_descr_get */
    0, /* tp_descr_set */
    0, /* tp_dictoffset */
    (initproc)lazymanifest_init, /* tp_init */
    0, /* tp_alloc */
};

void manifest_module_init(PyObject* mod) {
  lazymanifestType.tp_new = PyType_GenericNew;
  if (PyType_Ready(&lazymanifestType) < 0) {
    return;
  }
  Py_INCREF(&lazymanifestType);

  PyModule_AddObject(mod, "lazymanifest", (PyObject*)&lazymanifestType);
}
